 
# Randomisation for the study: 

library("rstudioapi")     
setwd(dirname(getActiveDocumentContext()$path))
getwd()


library("readxl") 
require(zoo) 
library(dplyr) 

cape = read_excel("CAPE COAST Commmunity List Formatted.xlsx")
akwa = read_excel("Communities Population Akwapim South Formatted.xlsx")
cape$`Sub metro` = na.locf(cape$`Sub metro`)
akwa$`Name of Sub District` = na.locf(akwa$`Name of Sub District`)
akwa$`2024 Population (microplanning)` = as.numeric(akwa$`2024 Population (microplanning)`)

# Cape:
cape_sorted <- cape[order(cape$`Est 2024 Population`, decreasing = TRUE), ]
cape_sorted <- cape_sorted[-which(is.na(cape_sorted$`Est 2024 Population`)), ]
dim(cape_sorted)
cape_sorted = cape_sorted[-c(143,142),]
dim(cape_sorted)[1]/3

# Akwa: 
akwa_sorted <- akwa[order(akwa$`2024 Population (microplanning)`, decreasing = TRUE), ]
dim(akwa_sorted)
akwa_sorted = akwa_sorted[-c(160),]
dim(akwa_sorted)[1]/3

# prepare list of treatments: 
list_treatments = c("TBHealth", "TBHealtPlus3", "TBHealtPlusText")

# case 1: AKWA: 
series_blocks = rep(1:(dim(akwa_sorted)[1]/3), each = 3)
akwa_sorted$block = series_blocks

sums_per_block <- akwa_sorted %>%
  group_by(block) %>%
  summarize(Total_Block = sum(`2024 Population (microplanning)`))

akwa_sorted <- akwa_sorted %>%
  left_join(sums_per_block, by = "block")

set.seed(123)
selected_treatments = list()
for (i in 1:max(akwa_sorted$block)) {
  selected_treatments[[i]] = sample(list_treatments, size = 3, replace = FALSE)
}
selected_treatments
selected_treatments_vector = unlist(selected_treatments)
selected_treatments_vector

akwa_sorted$treatment = selected_treatments_vector

akwa_sorted$Total_Population = sum(akwa_sorted$`2024 Population (microplanning)`)
akwa_sorted$Block_Probability = akwa_sorted$Total_Block/akwa_sorted$Total_Population
unique(akwa_sorted$Block_Probability)

set.seed(123)
selected_blocks <- sample(c(1:length(unique(akwa_sorted$Block_Probability))), 
                           size = 13, # used to be 12 
                           replace = FALSE, 
                           prob = unique(akwa_sorted$Block_Probability))
selected_blocks

akwa_sorted$Selected = NA
akwa_sorted$Selected[which(akwa_sorted$block %in% selected_blocks)] = "x"


# Case 2: 
series_blocks = rep(1:(dim(cape_sorted)[1]/3), each = 3)
cape_sorted$block = series_blocks

sums_per_block <- cape_sorted %>%
  group_by(block) %>%
  summarize(Total_Block = sum(`Est 2024 Population`))

cape_sorted <- cape_sorted %>%
  left_join(sums_per_block, by = "block")

set.seed(123)
selected_treatments = list()
for (i in 1:max(cape_sorted$block)) {
  selected_treatments[[i]] = sample(list_treatments, size = 3, replace = FALSE)
}
selected_treatments
selected_treatments_vector = unlist(selected_treatments)
selected_treatments_vector

cape_sorted$treatment = selected_treatments_vector

cape_sorted$Total_Population = sum(cape_sorted$`Est 2024 Population`)
cape_sorted$Block_Probability = cape_sorted$Total_Block/cape_sorted$Total_Population

unique(cape_sorted$Block_Probability)

set.seed(123)
selected_blocks <- sample(c(1:length(unique(cape_sorted$Block_Probability))), 
                          size = 13, # used to be 12 
                          replace = FALSE, 
                          prob = unique(cape_sorted$Block_Probability))
selected_blocks

cape_sorted$Selected = NA
cape_sorted$Selected[which(cape_sorted$block %in% selected_blocks)] = "x"

library(openxlsx)
write.xlsx(cape_sorted, '240216_CAPE_sorted_13blocks.xlsx')
write.xlsx(akwa_sorted, '240216_AKWA_sorted.xlsx')


